1 Packages

# Tidy data
library(tidyverse)
library(lubridate)

# Supplementary analysis
library(survival)
library(TraMineR)
library(rstatix)
library(epiR)

# Data
library(ISLR)

# plotly
library(plotly)

# ggplot2 and addins
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(gridExtra)
library(ggseqplot)
library(ggh4x)
library(ggstatsplot)

# Other graphics tools
library(treemap)

2 Basic plots

2.1 Barplots

2.1.1 Frequencies

# Simulate data for a barplot
dataBarplot <- data.frame(
  Episode = rep(c("First Covid-19 episode", "Last Covid-19 episode"), each = 63), # Two Covid episodes
  Time = factor(rep(rep(c("Acute phase", "3 months or more", "Still persisting"), each = 21), 2),
                levels = c("Acute phase", "3 months or more", "Still persisting")), # Symptoms last for a given time
  Symptoms = rep(c("Abdominal pain", "Anxiety", "Chest pain", "Cognitive disfunction", "Cough", # List of possible symptoms
                          "Depression", "Dizziness", "Fatigue", "Fever", "Gastrointestinal problem",
                          "Headache", "Joint pain", "Loss of smell or taste", "Menstruation issue", "Neuralgia",
                          "New allergies", "Shortness of breath", "Sleeping disorder", "Tachycardia", "Tinnitus",
                          "Troubled vision"), 6),
    N = c(0, 0, 0, 0, 2, 5, 9, 285, 404, 714, 646, 870, 765, 1873, 2632, 2106, 3181, 3844, # Number of cases
          3985, 4921, 3529, 0, 0, 0, 0, 0, 0, 3, 38,  51,  49,  82, 116, 119, 287, 142, 448, 
          190, 336, 158,  81, 502, 0, 1, 4, 6,  12,  12, 6, 315, 243, 192, 232, 212, 930, 394, 
          394, 902, 781, 351, 460,  92,  1318, 0, 0, 0, 0, 0, 0, 0,  71,  62, 119, 148, 162, 
          217, 123, 427, 347, 520, 669, 825, 730, 574, 0, 0, 0, 0, 1, 0, 0, 3, 7,  13, 7,  10,  
          22, 8,  13,  40,  15,  34,  18,  17,  36, 0, 1, 1, 2, 1, 2, 2, 61, 67, 59, 51, 58, 85, 
          217, 84, 200, 172, 79, 18, 115, 296)) %>%
  # Total number of cases per symptoms and episode
  mutate(Ntot = c(rep(by(N[Episode == "First Covid-19 episode"], Symptoms[Episode == "First Covid-19 episode"], sum), 3),
                  rep(by(N[Episode == "Last Covid-19 episode"], Symptoms[Episode == "Last Covid-19 episode"], sum), 3)))



# Initialize the plot with the dataset (DATA_ex2_1)
ggplot(dataBarplot) + 
  # Add a stacked bar chart (geom_col creates bars with heights determined by 'N')
  geom_col(
    mapping = aes(N, Symptoms, fill = Time), # Map 'N' to x, 'Symptome' to y, and 'Temps' to fill color
    width = 0.6,                              # Set the bar width
    position = "stack"                        # Stack bars by the 'fill' variable
  ) +
  # Create facets (separate panels) for each level of the 'Episode' variable
  facet_grid(cols = vars(Episode))  +
  # Add a vertical line at x = 0 for reference
  geom_vline(aes(xintercept = 0)) + 
  # Customize the x-axis scale
  scale_x_continuous(
    limits = c(0, 6500),                     # Set the range for the x-axis
    breaks = seq(0, 6000, 1000),             # Major tick marks every 1000 units
    minor_breaks = seq(500, 6500, 500),      # Minor tick marks every 500 units
    expand = c(0, 0)                         # Remove padding on the x-axis
  ) + 
  scale_fill_manual(
    name = "Symptoms duration",
    values = c('#28AFB0',  '#F6803C',  '#82354F')
  ) +
  # Add text labels to display 'Ntot' values next to each bar
  geom_text(
    mapping = aes(Ntot + 25, y = Symptoms, label = Ntot), # Position and content of the text
    hjust = 0,                                            # Align text to the left of its position
    nudge_x = 1                                           # Slightly nudge text horizontally
  ) +
  # Add titles and subtitles (empty here for now)
  labs(title = "", subtitle = "") + 
  # Customize the overall appearance of the plot
  theme(
    axis.title = element_blank(),                      # Remove axis titles
    panel.background = element_rect(fill = "white"),   # Set panel background to white
    panel.grid.major.x = element_line(color = "darkgray"), # Major grid lines for x-axis
    panel.grid.minor.x = element_line(colour = "lightgray"), # Minor grid lines for x-axis
    legend.position = "bottom"
  )

2.1.2 Proportions

# Simulate data for a bar plot
dataBarplot <- data.frame(
  # Create a "Status" column with repeated categories (12 repetitions for each category)
  Status = rep(c("Family member", "Active worker", "Retired worker"), each = 12),
  # Create an "Affection" column, categorizing Covid-19 status, with ordered factor levels
  Affection = factor(
    rep(rep(c("No Covid-19", "Covid-19", "Long Covid-19"), each = 4), 3),
    levels = c("No Covid-19", "Covid-19", "Long Covid-19")
  ),
  # Create a "Social_satisfaction" column, representing satisfaction levels, with ordered factor levels
  Social_satisfaction = factor(
    rep(c("Very satisfied", "Satisfied", "Unsatisfied", "Very unsatisfied"), 9),
    levels = c("Very satisfied", "Satisfied", "Unsatisfied", "Very unsatisfied")
  ),
  # Define a numeric vector "N" representing counts corresponding to the combinations above
  N = c(65, 255, 70, 16, 38, 131, 41, 7, 21, 123, 59, 10,
        819, 2532, 591, 130, 640, 1723, 357, 38, 235, 1134, 468, 68,
        1444, 4212, 677, 128, 528, 1198, 164, 20, 160, 575, 166, 27)
) %>%
  # Add new columns using dplyr's mutate function
  mutate(
    # Compute the total count (Ntot) for each "Affection" group within each "Status" group
    Ntot = c(
      # For "Family member", calculate totals per "Affection" group
      rep(by(N[Status == "Family member"], Affection[Status == "Family member"], sum), each = 4),
      # For "Active worker", calculate totals per "Affection" group
      rep(by(N[Status == "Active worker"], Affection[Status == "Active worker"], sum), each = 4),
      # For "Retired worker", calculate totals per "Affection" group
      rep(by(N[Status == "Retired worker"], Affection[Status == "Retired worker"], sum), each = 4)
    ),
    # Calculate percentages (P) for each count (N) relative to the total count (Ntot) per group
    P = N / Ntot * 100
  )





# Initialize the plot with the dataset (DATA_ex2_2)
ggplot(dataBarplot) + 
  # Add a stacked bar chart
  geom_col(
    mapping = aes(P, Affection, fill = Social_satisfaction), # Map 'P' to x, 'COVID_long_3mod' to y, and 'Q_E1' to fill
    width = 0.6,                                   # Set the bar width
    position = "stack",                            # Stack bars by the 'fill' variable
    color = "white"                                # Add white border to bars
  ) +
  # Create facets (separate rows) for each level of the Status variable
  facet_grid(rows = vars(Status)) +
  # Add vertical lines at x = 0 and x = 100 for reference
  geom_vline(aes(xintercept = 0)) + 
  geom_vline(aes(xintercept = 100)) + 
  # Customize the x-axis scale
  scale_x_continuous(
    limits = c(0, 100.2),                        # Set the range for the x-axis
    breaks = seq(0, 100, 10),                    # Major tick marks every 10%
    minor_breaks = seq(5, 95, 10),              # Minor tick marks every 5%
    expand = c(0, 0),                            # Remove padding on the x-axis
    labels = paste0(seq(0, 100, 10), "%")       # Append "%" to the tick labels
  ) + 
  # Add titles and subtitles (empty here for now)
  labs(title = " ", subtitle = "") + 
  # Customize the overall appearance of the plot
  theme(
    axis.title = element_blank(),                      # Remove axis titles
    # axis.text.x = element_blank(),                  # Uncomment to hide x-axis labels
    # axis.text.y = element_blank(),                  # Uncomment to hide y-axis labels
    panel.background = element_rect(fill = "white"),   # Set panel background to white
    panel.grid.major.x = element_line(color = "darkgray"), # Major grid lines for x-axis
    panel.grid.minor.x = element_line(colour = "lightgray") # Minor grid lines for x-axis
  ) +
  # Customize the fill legend for the bar chart
  scale_fill_manual(
    "Social interactions",                               # Title for the legend
    values = rev(RColorBrewer::brewer.pal(4, "RdYlGn")), # Reverse color palette
    labels = c(                                        # Define labels for legend items
      "Very satisfied",                           # Very satisfactory
      "Satisfied",                         # Somewhat satisfactory
      "Unsatisfied",                       # Somewhat unsatisfactory
      "Very unsatisfied"                     # Very unsatisfactory
    )
  )

dataBarplot <- data.frame(
  LTI = factor(rep(
    c("Leukaemia", "Diabetes (insipidus & mellitus)", "Epilepsy", "Multiple Sclerosis", "Parkinson & affiliated", "Mental Illness"), 5),
    levels = c("Leukaemia", "Diabetes (insipidus & mellitus)", "Epilepsy", "Multiple Sclerosis", "Parkinson & affiliated", "Mental Illness")),
  Age = factor(rep(
    c("Under 18", "18 to 24 years old", "25 to 44 years old", "45 to 59 years old", "Over 60"), each = 6),
    levels = c("Under 18", "18 to 24 years old", "25 to 44 years old", "45 to 59 years old", "Over 60")),
  N = sample(100:300, 30)
) %>% 
  mutate(
    percent = as.numeric(unlist(by(N, LTI, function(x) {x/sum(x)*100})))[rep(seq(1, 26, 5), 5) + rep(0:4, each = 6)],
    lab = ifelse(percent > 2, paste0(formatC(percent, 1, format = "f"), "%"), ""))


x_text <- rep(NA, nrow(dataBarplot))
for (i in unique(dataBarplot$LTI)) {
  for (j in 1:sum(dataBarplot$LTI == i)) {
    ifelse(j == 1,
           x_text[which(dataBarplot$LTI == i)[j]] <- dataBarplot$percent[dataBarplot$LTI == i][j]/2,
          x_text[which(dataBarplot$LTI == i)[j]] <- sum(dataBarplot$percent[dataBarplot$LTI == i][1:(j-1)]) +
                          dataBarplot$percent[dataBarplot$LTI == i][j]/2)
  }
}
dataBarplot$x_text <- 100 - x_text
 
 
ggplot(data = dataBarplot) +
  geom_col(
    mapping = aes(x = LTI, y = percent, fill = Age),
    width = 0.6,
    position = "stack",
    color = "white"
  ) +
  geom_text(aes(x = LTI,
                y = x_text,
                label = lab),
            size = 3,
            colour = "white") +
  labs(title = "Long term illnesses per age group") +
  ylab("Proportion") +
  xlab("") +
  scale_y_continuous(
    breaks = seq(0, 100, 25),
    labels = c("0%", "25%", "50%", "75%", "100%")
  ) +
  scale_fill_manual(
    values = c('#1E5471',  '#28AFB0',  '#F6803C',  '#82354F' , '#E0D6FF'),
    name = "Classe ATC1"
  ) +
  theme_minimal() +
  theme(legend.position = "right",
        axis.text.x = element_text(
          size = 10,
          angle = 45, 
          vjust = 1, 
          hjust = 1
        ))

2.1.3 Incidence rates

ymax <- 200
coeff <- 70/200


dataBarplot <- data.frame(
  year = 2012:2022,
  n_cases = c(120, 87, 78, 96, 117, 153, 139, 164, 181, 188, 163),
  pop = rep(300000, 11)
) %>%
  mutate(
    IR = n_cases / pop * 100000,
    IR_lower = epi.conf(cbind(n_cases, pop), ctype = "inc.rate", method = "exact")[, 2] * 100000,
    IR_upper = epi.conf(cbind(n_cases, pop), ctype = "inc.rate", method = "exact")[, 3] * 100000
  )


ggplot(dataBarplot,
       aes(
         x = year,
         y = n_cases,
         width = 0.95
       )) +
  geom_col(
    mapping = aes( fill = "Incidence"),
    fill = "#AAC0AF" # couleur des barres
  ) +
  geom_line(
    aes(
      x = year,
      y = IR,
      colour = "Incidence rate" # Nmo dans la legende
    ),
    linetype = 1,
    size = 1,
    colour = "black"
  ) + 
  geom_errorbar(
    aes(
      x = year,
      ymin = IR_lower,
      ymax = IR_upper,
      width = 0.2
    )
  ) +# Separation de l'axe y
  scale_y_continuous(
    expand = c(0,0),
    limits = c(0, ymax),
    breaks = seq(0, ymax, 25),
    name = "Incidence",
    sec.axis = sec_axis(
      trans = ~.*coeff,
      name = "Incidence rate",
      breaks = seq(0, ymax*coeff, 10)
    )
  ) + 
  scale_x_continuous(
    breaks = 2012:2022,
    labels = 2012:2022,
    name = ""
  ) +
  theme_classic() + 
  theme(
    panel.grid.major.y = element_line(colour = "grey"),
    panel.grid.minor.y = element_line(colour = "lightgray"),
    axis.text.x = element_text(size = 11),
    axis.line.y.left = element_line(color = "#AAC0AF"),
    axis.title.y.left = element_text(color = "#AAC0AF"),
    axis.text.y.left = element_text(color = "#AAC0AF")
  ) + # Titres
  labs(
    title = "",
    fill = "",
    x = " ",
    y = " "
  ) 

2.2 Boxplot

2.2.1 Classic

# Create a data frame for boxplot visualization
dataBoxplot <- data.frame(
  med_id = rep(1:8, c(8, 3, 3, 5, 7, 6, 4, 11)),  # Assign medical operator IDs, repeated based on surgery counts
  op_id = c(1:8, 1:3, 1:3, 1:5, 1:7, 1:6, 1:4, 1:11),  # Assign operation IDs for each surgery
  setup = pmax(10 - rpois(47, lambda = 4), 1)  # Simulate 'setup' scores, capped at 10 and minimum of 1
)

# Create a data frame for the line plot (average setup values per operation ID)
dataLine <- data.frame(
  op_id = 1:max(dataBoxplot$op_id),  # Sequence of operation IDs
  setup = as.numeric(by(dataBoxplot$setup, dataBoxplot$op_id, mean))  # Compute mean 'setup' scores by op_id
)

# Generate the plot using ggplot2
ggplot(dataBoxplot) +
  # Add a boxplot layer for setup scores grouped by operation ID
  geom_boxplot(
    aes(group = op_id, x = op_id, y = setup)  # Map operation ID and setup scores to boxplot
  ) +
  # Add a red line connecting the mean setup values for each operation ID
  geom_line(
    data = dataLine,          # Use the data frame with mean setup values
    aes(op_id, setup),        # Map operation ID to x and mean setup to y
    color = "red"             # Set line color to red
  ) +
  # Add red points for the mean setup values
  geom_point(
    data = dataLine,          # Use the data frame with mean setup values
    aes(op_id, setup),        # Map operation ID to x and mean setup to y
    color = "red",            # Set point color to red
    shape = 16                # Use solid circle for points
  ) +
  # Add a subtitle with the p-value from a linear model (setup ~ op_id)
  labs(subtitle = paste0("p=", round(summary(lm(setup ~ op_id, dataBoxplot))$coefficient[2, 4], 4))) +
  # Label the y-axis
  ylab("Surgery setup grade (out of 10)") +
  # Label the x-axis
  xlab("Number of surgeries") +
  # Customize the y-axis scale
  scale_y_continuous(
    limits = c(0, 10),         # Set y-axis limits between 0 and 10
    breaks = 0:10,             # Add breaks at each integer value
    expand = c(0, 0)           # Remove padding on the axis
  ) +
  # Customize the x-axis scale
  scale_x_discrete(
    limits = 1:max(dataBoxplot$op_id),  # Limit x-axis to the range of operation IDs
    breaks = 1:max(dataBoxplot$op_id)  # Add breaks at each operation ID
  ) +
  # Apply a minimal theme for clean visualization
  theme_minimal()

# Simulate data for a boxplot
dataBoxplot <- data.frame(
  id = rep(1:47, 5),  # 47 subjects, repeated 5 times for each group
  times = rep(paste0("Month ", c(0, 3, 6, 9, 12)), each = 47),  # 5 time points
  val = c(  # Simulate values for each time point with different means and SDs
    rnorm(n = 47, mean = 2, sd = 1),  # Group 1 (mean=2, sd=1)
    rnorm(n = 47, mean = 5, sd = 1),  # Group 2 (mean=5, sd=1)
    rnorm(n = 47, mean = 5, sd = 2),  # Group 3 (mean=5, sd=2)
    rnorm(n = 47, mean = 8, sd = 2),  # Group 4 (mean=8, sd=2)
    rnorm(n = 47, mean = 8, sd = 5)   # Group 5 (mean=8, sd=5)
  )
)
dataBoxplot$val <- pmax(0, dataBoxplot$val)

# Perform ANOVA to test differences between the groups
res.aov <- anova_test(
  data = dataBoxplot,
  dv = val,     # Dependent variable: val (size)
  wid = id,     # Within-subjects factor: id
  between = times  # Between-subjects factor: times
)

# Pairwise t-tests to compare each group with group 1, adjusting for multiple comparisons using Bonferroni correction
pwc <- dataBoxplot %>%
  pairwise_t_test(
    val ~ times,  # Dependent variable 'val' across levels of 'times'
    paired = TRUE,  # Paired samples
    ref.group = "Month 0",  # Use "n°1" as the reference group
    p.adjust.method = "bonferroni"  # Apply Bonferroni correction to p-values
  ) %>%
  add_xy_position(x = "times")  # Add xy-position for displaying p-values

# Create the boxplot with individual data points and p-values
ggboxplot(
  dataBoxplot,  # Data for the boxplot
  x = "times",  # Group by 'times'
  y = "val",    # Plot 'val' (size) on the y-axis
  add = "point",  # Add individual data points to the boxplot
  ylab = "Size",  # Label for the y-axis
  xlab = ""      # No label for the x-axis
) +
  scale_y_continuous(
    limits = c(0, max(dataBoxplot$val)),
    expand = c(0, 0)
  ) +
  stat_pvalue_manual(pwc) +  # Add p-values from pairwise t-tests
  labs(
    subtitle = get_test_label(res.aov, detailed = FALSE),  # Subtitle for ANOVA result
    caption = "2 by 2 testing: Student's t-test, p-value adjustment: Bonferroni"  # Caption explaining test details
  )

2.2.2 Violins

ggstatsplot::ggbetweenstats(ISLR::Wage, education, wage, "np") +
  ylab("Workers raw wage") +
  xlab("Education level")

3 Models outputs

3.1 Survival curve

n <- 1000  # Number of individuals
df <- data.frame(
  id = 1:n,
  time = as.numeric(sample(1:365, n, replace = TRUE)),  # Simulated follow-up time between 1 and 365 days
  event = factor(sample(0:1, n, prob = c(0.4, 0.6), replace = TRUE)),  # Event (0 = censored, 1 = sick, 2 = dead)
  exposure = sample(c("Unexposed", "Exposed"), n, replace = TRUE)  # Exposure status (Unexposed or Exposed)
)
df$time[df$event == 0] <- 365  # Set time to 365 for censored observations (event == 0)

fit <- survfit(Surv(time = time, event = event) ~ exposure, data = df) # Fitting a Survival Model

ymax <- 30
autoplot(fit, surv.linetype = 'blank', surv.colour = NA) +
  geom_step(aes(linetype = strata, color = event), size = 0.8) +
  scale_linetype_manual(name = "Group",
                        values = c('dotted', 'solid'),
                        labels = c('Non-exposed', 'Exposed')) +
  scale_color_manual(name = "Event",
                     values = c("white", "#1C4073"),
                     labels = c(' ', "Hospitalised")) +
  labs(x = "Période de suivi (jours)", y = "Proportion", title = "") +
  geom_vline(aes(xintercept = 0), size = 1) +
  geom_hline(aes(yintercept = 1-ymax/100), size = 1) +
  scale_y_reverse(limits = 1-c(ymax/100, 1), breaks = 1-seq(ymax/100, 1, 0.1),
                  labels = paste0(seq(ymax, 100, 10), "%"), expand = c(0,0)) +
  scale_x_continuous(breaks = seq(0, 360, 60), expand = c(0,0)) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x = element_text(face="bold", colour="black", size = 12),
        axis.title.y = element_text(face="bold", colour="black", size = 12),
        legend.title = element_text(face="bold", size = 10),
        panel.grid.major.y = element_line(colour = "lightgray", size = 0.3),
        panel.grid.minor.x = element_blank(),
        legend.position = c(0.15, 0.30),
        legend.background = element_rect(fill = "transparent", color = "white"))

# Simulating survival data
n <- 1000  # Number of individuals
df <- data.frame(
  id = 1:n,
  time = as.numeric(sample(1:365, n, replace = TRUE)),  # Simulated follow-up time between 1 and 365 days
  event = factor(sample(0:2, n, prob = c(0.4, 0.5, 0.1), replace = TRUE)),  # Event (0 = censored, 1 = sick, 2 = dead)
  exposure = sample(c("Unexposed", "Exposed"), n, replace = TRUE)  # Exposure status (Unexposed or Exposed)
)
df$time[df$event == 0] <- 365  # Set time to 365 for censored observations (event == 0)

fit <- survfit(Surv(time = time, event = event) ~ exposure, data = df) # Fitting a Survival Model


# Plot a survival curve with `autoplot` and enhance it with additional layers
autoplot(fit, surv.linetype = 'blank', surv.colour = NA) + 
  # Add step lines representing survival curves
  geom_step(
    aes(linetype = strata, color = event), # Map line type to `strata` and color to `event`
    size = 0.8                             # Set line thickness
  ) +
  # Customize the line types for survival curves
  scale_linetype_manual(
    name = "Group",                         # Title for the legend
    values = c('dashed', 'solid'),          # Define line types: dashed for unexposed, solid for exposed
    labels = c('Unexposed', 'Exposed')      # Labels for the legend
  ) +
  # Customize the colors for the event states
  scale_color_manual(
    name = "State",                         # Title for the legend
    values = c("#AAC0AF", "#1C4073", "#B28B84"), # Colors for Healthy, Sick, and Dead states
    labels = c('Healthy', 'Sick', 'Dead')   # Labels for the legend
  ) +
  # Add axis labels and a title
  labs(
    x = "Followup period (days)",           # X-axis label
    y = "Proportion of the population",     # Y-axis label
    title = ""                              # Title (empty for now)
  ) +
  # Add a vertical reference line at x = 0
  geom_vline(
    aes(xintercept = 0), # Position of the line
    size = 1             # Thickness of the line
  ) +
  # Customize the y-axis scale
  scale_y_continuous(
    limits = c(0, 1),                      # Set the y-axis range from 0 to 1
    breaks = seq(0, 1, 0.1),               # Major ticks every 0.1
    labels = paste0(seq(0, 100, 10), "%"), # Convert tick labels to percentages
    expand = c(0, 0)                       # Remove padding on the y-axis
  ) +
  # Customize the x-axis scale
  scale_x_continuous(
    breaks = seq(0, 360, 60),              # Major ticks every 60 days
    expand = c(0, 0)                       # Remove padding on the x-axis
  ) +
  # Apply a minimal theme for a clean appearance
  theme_minimal() +
  # Customize specific theme elements
  theme(
    plot.title = element_text(hjust = 0.5),          # Center-align the plot title
    axis.title.x = element_text(
      face = "bold",                                 # Make x-axis title bold
      colour = "black",                              # Set title color to black
      size = 12                                      # Set font size
    ),
    axis.title.y = element_text(
      face = "bold",                                 # Make y-axis title bold
      colour = "black",                              # Set title color to black
      size = 12                                      # Set font size
    ),
    legend.title = element_text(face = "bold", size = 10), # Style legend titles
    panel.grid.major.y = element_line(
      colour = "lightgray",                          # Light gray major grid lines on the y-axis
      size = 0.3                                     # Set line thickness
    ),
    panel.grid.minor.x = element_blank()             # Remove minor grid lines on the x-axis
  )

5 Nested Data

5.1 Treemap

# Create a data frame containing cause of death, details (subcategory), and occurrences
dataTreemap <- data.frame(
  cause_of_death = c(
    "Cancer",
    rep("Non cancerous diseases", 6),
    rep("Road accidents", 2),
    rep("Other or unknown causes", 4),
    "Suicide"
  ),
  details = c(
    NA,                     # Subcategories for "Cancer" are not specified
    "Cardiac",              # Subcategories for "Non cancerous diseases"
    "Respiratory",
    "Mental disorders",
    "Digestive",
    "Endocrinian",
    "Other",
    "Car",                  # Subcategories for "Road accidents"
    "Other",
    "Weapons",              # Subcategories for "Other or unknown causes"
    "Poorly defined",
    "Undefined",
    "Other",
    NA                      # Subcategories for "Suicide" are not specified
  ),
  n = c(62, 19, 2, 1, 3, 2, 8, 7, 26, 5, 14, 8, 8, 52) # Occurrences for each category/subcategory
)

# Add the total count (n) for each cause_of_death to its label
dataTreemap <- as.data.frame(t(apply(dataTreemap, 1, function(x) {
  x[1] <- paste0(
    x[1], " (n=",                             # Append the total count to the cause_of_death label
    sum(dataTreemap$n[dataTreemap$cause_of_death == x[1]]), 
    ")"
  )
  x
})))
dataTreemap$n <- as.numeric(dataTreemap$n)    # Convert the n column back to numeric after transformation

# Generate the treemap
treemap(
  dataTreemap,
  index = c("cause_of_death", "details"),      # Define hierarchical levels for the treemap
  vSize = "n",                                # Size of rectangles corresponds to the n column
  type = "index",                             # Color rectangles based on the index
  algorithm = "pivotSize",                    # Use pivot size layout algorithm for better space optimization
  title = "",                                 # No title for the treemap
  align.labels = list(c("left", "top"), c("left", "bottom")), # Align labels in blocks
  palette = "Pastel1",                        # Use pastel colors for the treemap
  fontsize.title = 22,                        # Title font size
  fontcolor.labels = "black",                 # Set label font color to black
  bg.labels = 0,                              # Transparent background for labels
  aspRatio = 1                                # Set aspect ratio to make the blocks square
)